In [1]:
# imports
import panel as pn
pn.extension('plotly')
import plotly.express as px
import pandas as pd
import hvplot.pandas
import matplotlib.pyplot as plt
import os
from pathlib import Path
from dotenv import load_dotenv
In [2]:
 # Read the Mapbox API key
load_dotenv()
map_box_api = os.getenv("mapbox")
In [3]:
# Read the census data into a Pandas DataFrame
file_path = Path("toronto_neighbourhoods_census_data.csv")
to_data = pd.read_csv(file_path, index_col="year")
to_data.head()
Out[3]:
neighbourhood single_detached_house apartment_five_storeys_plus movable_dwelling semi_detached_house row_house duplex apartment_five_storeys_less other_house average_house_value shelter_costs_owned shelter_costs_rented
year
2001 Agincourt North 3715 1480 0 1055 1295 195 185 5 200388 810 870
2001 Agincourt South-Malvern West 3250 1835 0 545 455 105 425 0 203047 806 892
2001 Alderwood 3175 315 0 470 50 185 370 0 259998 817 924
2001 Annex 1060 6090 5 1980 605 275 3710 165 453850 1027 1378
2001 Banbury-Don Mills 3615 4465 0 240 380 15 1360 0 371864 1007 1163
In [4]:
# Calculate the sum number of dwelling types units per year (hint: use groupby)
to_data_mod= to_data.drop(columns=['average_house_value','shelter_costs_owned','shelter_costs_rented'])
to_sum_dwellings = to_data_mod.groupby(['year']).sum()
to_sum_dwellings.head()
Out[4]:
single_detached_house apartment_five_storeys_plus movable_dwelling semi_detached_house row_house duplex apartment_five_storeys_less other_house
year
2001 300930 355015 75 90995 52355 23785 116900 3040
2006 266860 379400 165 69430 54690 44095 162850 1335
2011 274940 429220 100 72480 60355 44750 163895 2165
2016 269680 493270 95 71200 61565 48585 165575 2845
In [5]:
# Save the dataframe as a csv file
to_sum_dwellings.to_csv('to_sum_dwellings.csv')
In [6]:
# Helper create_bar_chart function
def create_bar_chart(data, title, xlabel, ylabel, color):
    """
    Create a barplot based in the data argument.
    """
    
    fig = plt.figure()
    bar_chart = data.plot.bar(color=color)
    bar_chart.set_xlabel(xlabel)
    bar_chart.set_ylabel(ylabel)
    bar_chart.set_title(title)
    plt.show()
    plt.close(fig)
In [7]:
# Create a bar chart per year to show the number of dwelling types

# Bar chart for 2001
create_bar_chart(to_sum_dwellings.loc[2001], "Dwelling Types in Toronto in 2001", "2001", "Dwelling Types Units", "red") 

# Bar chart for 2006
create_bar_chart(to_sum_dwellings.loc[2006], "Dwelling Types in Toronto in 2006", "2006", "Dwelling Types Units", "blue") 


# Bar chart for 2011
create_bar_chart(to_sum_dwellings.loc[2011], "Dwelling Types in Toronto in 2011", "2011", "Dwelling Types Units", "orange") 


# Bar chart for 2016
create_bar_chart(to_sum_dwellings.loc[2016], "Dwelling Types in Toronto in 2016", "2016", "Dwelling Types Units", "magenta") 
In [8]:
# Calculate the average monthly shelter costs for owned and rented dwellings
avg_monthly_costs =to_data[['shelter_costs_owned', 'shelter_costs_rented']].groupby(['year']).mean()
avg_monthly_costs
Out[8]:
shelter_costs_owned shelter_costs_rented
year
2001 846.878571 1085.935714
2006 1316.800000 925.414286
2011 1448.214286 1019.792857
2016 1761.314286 1256.321429
In [9]:
# Helper create_line_chart function
def create_line_chart(data, title, xlabel, ylabel, color):
    """
    Create a line chart based in the data argument.
    """
    fig = plt.figure()
    line_chart = data.plot.line(color=color)
    line_chart.set_xlabel(xlabel)
    line_chart.set_ylabel(ylabel)
    line_chart.set_title(title)
    plt.show()
    plt.close(fig)
In [10]:
# Create two line charts, one to plot the monthly shelter costs for owned dwelleing and other for rented dwellings per year

# Line chart for owned dwellings
create_line_chart(avg_monthly_costs['shelter_costs_owned'], 'Average Monthly Shelter Costs for Owned Dwellings in Toronto','Year', 'Average Monthly Shelter Costs', 'blue') 

# Line chart for rented dwellings
create_line_chart(avg_monthly_costs['shelter_costs_rented'], 'Average Monthly Shelter Costs for Owned Dwellings in Toronto','Year', 'Average Monthly Shelter Costs', 'orange') 
In [11]:
# Calculate the average house value per year
average_house_value = to_data['average_house_value'].groupby('year').mean()
average_house_value
Out[11]:
year
2001    289882.885714
2006    424059.664286
2011    530424.721429
2016    664068.328571
Name: average_house_value, dtype: float64
In [12]:
# Plot the average house value per year as a line chart
create_line_chart(average_house_value, 'Average House Value in Toronto', 'Year', 'Avg. House Value', 'blue')
In [13]:
# Create a new DataFrame with the mean house values by neighbourhood per year
avg_value_by_neighbourhood = to_data.filter(['year','neighbourhood','average_house_value'], axis=1)
avg_value_by_neighbourhood.reset_index(inplace=True)
avg_value_by_neighbourhood.head(10)
Out[13]:
year neighbourhood average_house_value
0 2001 Agincourt North 200388
1 2001 Agincourt South-Malvern West 203047
2 2001 Alderwood 259998
3 2001 Annex 453850
4 2001 Banbury-Don Mills 371864
5 2001 Bathurst Manor 304749
6 2001 Bay Street Corridor 257404
7 2001 Bayview Village 327644
8 2001 Bayview Woods-Steeles 343535
9 2001 Bedford Park-Nortown 565304
In [14]:
# Use hvplot to create an interactive line chart of the average house value per neighbourhood
# The plot should have a dropdown selector for the neighbourhood
avg_value_by_neighbourhood.rename(columns = {'year':'Year', 'average_house_value':'Avg. House Value'}, inplace = True)
avg_value_by_neighbourhood.hvplot.line(x='Year', y='Avg. House Value', groupby='neighbourhood')
Out[14]:
In [15]:
# Fetch the data of all dwelling types per year
all_dwelling_types = to_data
all_dwelling_types.reset_index(inplace=True) 
all_dwelling_types.head(10)
Out[15]:
year neighbourhood single_detached_house apartment_five_storeys_plus movable_dwelling semi_detached_house row_house duplex apartment_five_storeys_less other_house average_house_value shelter_costs_owned shelter_costs_rented
0 2001 Agincourt North 3715 1480 0 1055 1295 195 185 5 200388 810 870
1 2001 Agincourt South-Malvern West 3250 1835 0 545 455 105 425 0 203047 806 892
2 2001 Alderwood 3175 315 0 470 50 185 370 0 259998 817 924
3 2001 Annex 1060 6090 5 1980 605 275 3710 165 453850 1027 1378
4 2001 Banbury-Don Mills 3615 4465 0 240 380 15 1360 0 371864 1007 1163
5 2001 Bathurst Manor 2405 1550 0 130 130 375 745 0 304749 843 1052
6 2001 Bay Street Corridor 10 7575 0 0 15 0 240 0 257404 1218 1142
7 2001 Bayview Village 2170 630 0 170 765 15 640 0 327644 1197 1164
8 2001 Bayview Woods-Steeles 1650 1715 0 925 105 10 170 5 343535 1212 1018
9 2001 Bedford Park-Nortown 4985 2080 0 45 40 210 1235 15 565304 933 1491
In [16]:
# Use hvplot to create an interactive bar chart of the number of dwelling types per neighbourhood
# The plot should have a dropdown selector for the neighbourhood
all_dwelling_types.rename(columns = {'year':'Year'}, inplace = True)
all_dwelling_types.hvplot.bar(x='Year', ylabel= "Dwelling Type Units", groupby='neighbourhood', rot=90,ylim=(0,4000), yticks=5, height=400)
Out[16]:
In [17]:
# Getting the data from the top 10 expensive neighbourhoods
mean_values_neighbourhood = all_dwelling_types.groupby('neighbourhood').mean()[['single_detached_house', 'apartment_five_storeys_plus','movable_dwelling','semi_detached_house','row_house','duplex','apartment_five_storeys_less','other_house','average_house_value','shelter_costs_owned','shelter_costs_rented']]
mean_values_neighbourhood= mean_values_neighbourhood.sort_values(by='average_house_value', ascending=False)
top_ten_neighbourhoods= mean_values_neighbourhood.head(10)
top_ten_neighbourhoods.reset_index(inplace=True)
top_ten_neighbourhoods
Out[17]:
neighbourhood single_detached_house apartment_five_storeys_plus movable_dwelling semi_detached_house row_house duplex apartment_five_storeys_less other_house average_house_value shelter_costs_owned shelter_costs_rented
0 Bridle Path-Sunnybrook-York Mills 2260.00 331.25 0.00 36.25 90.00 25.0 40.00 0.00 1526485.75 2360.75 2321.75
1 Forest Hill South 1742.50 2031.25 1.25 61.25 45.00 75.0 1027.50 3.75 1195992.50 1781.00 1313.75
2 Lawrence Park South 3472.50 773.75 0.00 126.25 38.75 225.0 966.25 16.25 1094027.75 1954.00 1372.75
3 Rosedale-Moore Park 2498.75 4641.25 0.00 486.25 245.00 327.5 1618.75 2.50 1093640.00 1909.75 1537.25
4 St.Andrew-Windfields 3225.00 1670.00 0.00 185.00 552.50 97.5 586.25 5.00 999107.00 1880.25 1384.50
5 Casa Loma 916.25 2310.00 0.00 288.75 201.25 162.5 1192.50 2.50 981064.25 1873.75 1547.75
6 Bedford Park-Nortown 4865.00 1981.25 0.00 43.75 57.50 287.5 1275.00 88.75 930415.25 1786.75 1255.00
7 Forest Hill North 1488.75 3392.50 0.00 12.50 16.25 82.5 402.50 1.25 851680.50 1722.75 1245.50
8 Kingsway South 2326.25 576.25 0.00 66.25 48.75 20.0 336.25 2.50 843234.25 1736.75 1622.00
9 Yonge-St.Clair 565.00 3948.75 0.00 425.00 212.50 172.5 1308.75 6.25 813220.25 1680.75 1369.00
In [18]:
# Plotting the data from the top 10 expensive neighbourhoods
top_ten_neighbourhoods.rename(columns = {'neighbourhood':'Neighbourhood', 'average_house_value':'Avg. House Value'},inplace=True)
top_ten_neighbourhoods.hvplot.bar(x='Neighbourhood', y= "Avg. House Value", rot=90, height=500)
C:\Users\Owner\AppData\Local\Temp\ipykernel_2828\815796198.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_ten_neighbourhoods.rename(columns = {'neighbourhood':'Neighbourhood', 'average_house_value':'Avg. House Value'},inplace=True)
Out[18]:
In [19]:
# Load neighbourhoods coordinates data
file_path = Path("toronto_neighbourhoods_coordinates.csv")
df_neighbourhood_locations = pd.read_csv(file_path)
df_neighbourhood_locations.head(5)
Out[19]:
neighbourhood lat lon
0 Agincourt North 43.805441 -79.266712
1 Agincourt South-Malvern West 43.788658 -79.265612
2 Alderwood 43.604937 -79.541611
3 Annex 43.671585 -79.404001
4 Banbury-Don Mills 43.737657 -79.349718
In [20]:
# Calculate the mean values for each neighborhood
avg_values_neighbourhood= all_dwelling_types.groupby('neighbourhood').mean()[['single_detached_house', 'apartment_five_storeys_plus','movable_dwelling','semi_detached_house','row_house','duplex','apartment_five_storeys_less','other_house','average_house_value','shelter_costs_owned','shelter_costs_rented']]
avg_values_neighbourhood.reset_index(inplace=True)
avg_values_neighbourhood.head(5)
Out[20]:
neighbourhood single_detached_house apartment_five_storeys_plus movable_dwelling semi_detached_house row_house duplex apartment_five_storeys_less other_house average_house_value shelter_costs_owned shelter_costs_rented
0 Agincourt North 3435.00 1947.50 2.50 863.75 1406.25 512.50 547.50 10.00 329811.5 1109.00 983.50
1 Agincourt South-Malvern West 2897.50 2180.00 1.25 375.00 456.25 523.75 628.75 32.50 334189.0 1131.25 985.00
2 Alderwood 2903.75 302.50 1.25 503.75 76.25 302.50 502.50 1.25 427922.5 1166.75 1003.25
3 Annex 751.25 7235.00 1.25 1375.00 613.75 355.00 4605.00 83.75 746977.0 1692.75 1315.25
4 Banbury-Don Mills 3572.50 5388.75 1.25 273.75 626.25 32.50 1340.00 0.00 612039.0 1463.50 1242.75
In [21]:
# Join the average values with the neighbourhood locations
df_combined = pd.merge(df_neighbourhood_locations, avg_values_neighbourhood, on="neighbourhood")
df_combined.head(5)
Out[21]:
neighbourhood lat lon single_detached_house apartment_five_storeys_plus movable_dwelling semi_detached_house row_house duplex apartment_five_storeys_less other_house average_house_value shelter_costs_owned shelter_costs_rented
0 Agincourt North 43.805441 -79.266712 3435.00 1947.50 2.50 863.75 1406.25 512.50 547.50 10.00 329811.5 1109.00 983.50
1 Agincourt South-Malvern West 43.788658 -79.265612 2897.50 2180.00 1.25 375.00 456.25 523.75 628.75 32.50 334189.0 1131.25 985.00
2 Alderwood 43.604937 -79.541611 2903.75 302.50 1.25 503.75 76.25 302.50 502.50 1.25 427922.5 1166.75 1003.25
3 Annex 43.671585 -79.404001 751.25 7235.00 1.25 1375.00 613.75 355.00 4605.00 83.75 746977.0 1692.75 1315.25
4 Banbury-Don Mills 43.737657 -79.349718 3572.50 5388.75 1.25 273.75 626.25 32.50 1340.00 0.00 612039.0 1463.50 1242.75
In [22]:
# Create a scatter mapbox to analyze neighbourhood info
px.set_mapbox_access_token(map_box_api)
map_plot = px.scatter_mapbox(
    df_combined,
    lat='lat', 
    lon='lon', 
    size="average_house_value",
    color="average_house_value",
    color_continuous_scale=px.colors.cyclical.IceFire, 
    size_max=15, 
    zoom=9, 
    hover_name="neighbourhood", 
    hover_data= ["shelter_costs_owned", 
                 "shelter_costs_rented",
                 "single_detached_house",
                 "apartment_five_storeys_plus",
                 "movable_dwelling",
                 "semi_detached_house",
                 "row_house",
                 "duplex",
                 "apartment_five_storeys_less",
                 "other_house"], 
    height=600
)

# Display the map
map_plot.show()
In [23]:
# Create a bar chart row facet to plot the average house values for all Toronto's neighbourhoods per year
fig = px.bar(
    avg_value_by_neighbourhood, 
    x="neighbourhood", 
    y="Avg. House Value", 
    facet_row="Year", 
    color="Avg. House Value",
    color_continuous_scale=px.colors.cyclical.IceFire, 
    height=900, width=950,   
    title="Average House Values in Toronto Per Neighbourhood"
)

fig.show()